1 Setup

# Load packages
library(sf)
library(xml2)
library(tidyverse)
library(tidybayes)
library(brms)
# Define genus level taxon groups (plus one family FAVI)
taxon_groups <- list(
  PORI = c("PPOR", "PFUR", "PDIV", "PAST", "PORI"),
  ORBI = c("OFAV", "OANN", "OFRA", "ORBI"),
  FAVI = c("CNAT", "DLAB", "PSTR", "PCLI", "MARE", "FAVI"),
  AGAR = c("AFRA", "AAGA", "AHUM", "ALAM", "AGAR"),
  MADR = c("MAUR", "MSEN", "MDEC", "MPHA", "MADR"),
  SOLE = c("SHYA", "SBOU", "SOLE"),
  SCOL = c("SLAC", "SCUB", "SCOL"),
  SIDE = c("SSID", "SRAD", "SIDE"),
  MYCE = c("MFER", "MLAM", "MALI", "MYCE"),
  OCUL = c("OROB", "ODIF", "OCUL")
)

# Convert to lookup tibble
taxon_lookup <- enframe(taxon_groups, name = "taxon_group", value = "taxon") %>%
  unnest(taxon)



# Define juvenile family level taxon groups (following DRM survey convention)
taxon_groups_juv <- list(
  MUSS = c("ISIN", "ISOP", "MANG", "MYCE", "SCOL", "MUSS"),
  FAVI = c("FAVI", "FFRA", "MARE"),
  MEAN = c("MMEA", "MEAN", "DCYL", "DSTO", "EFAS")
)
# Convert to lookup tibble
taxon_lookup_juv <- enframe(taxon_groups_juv, name = "taxon_group", value = "taxon") %>%
  unnest(taxon)


# Order levels of Habitat Types
type_levels <- c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef",
                 "Outer Reef", "Aggregated Patch Reef")
type_labels <- c("NRC", "IR", "MR", "OR", "APR")
names(type_labels) <- type_levels

# Order survey datasets
dataset_levels <- c("dca", "tt21", "tt23", "drm")
dataset_labels <- c("2017—DCA", "2021—TT", "2023—TT", "2024—Shedd")
names(dataset_labels) <- dataset_levels

2 Import data

2.1 2011 (NSU) and 2017 (DCA) ESA Surveys

2.2 2017 Dial Cordy Recon Survey

DCA site metadata

# Site metadata
# Read in site coordinates
dca_sitemd0 <- readxl::read_xlsx("data/2017_dca_recon/Recon_Site_Coordinates_Extracted.xlsx") %>%
  janitor::clean_names()

# All sites have start and end coordinates...

# Tidy and Calculate midpoint per transect
dca_sitemd <- dca_sitemd0 %>%
  mutate(
    depth = abs(as.numeric(depth)),
    across(c(latitude, longitude), as.numeric)
  ) %>%
  group_by(site = transect) %>%
  summarize(
    latitude = mean(latitude, na.rm = TRUE),
    longitude = mean(longitude, na.rm = TRUE),
    depth = mean(depth, na.rm = TRUE),
    .groups = "drop"
  )

DCA coral data

# Read in survey data
dca0 <- readxl::read_xlsx("data/2017_dca_recon/Compiled_DCA_RECON_Belt_data.xlsx") %>%
  janitor::clean_names()

dca <- dca0 %>%
  select(1:18) %>%
  rename(site = site_name) %>%
  mutate(site = factor(site)) %>%
  select(site, taxon = coral_species, max_width_cm = max_size_cm)

# Adjust/corrects species IDs
sort(unique(dca$taxon))
##  [1] "AAGA"               "ACER"               "AFRA"              
##  [4] "AGA SP"             "AHUM"               "ALAM"              
##  [7] "CNAT"               "CORAL"              "Cup Coral"         
## [10] "DLAB"               "DSTO"               "EFAS"              
## [13] "LCUC"               "MAD SP"             "MADSP"             
## [16] "MALI"               "MARE"               "MCAV"              
## [19] "MDEC"               "MLAM"               "MMEA"              
## [22] "MPHA"               "Mycetophyllia spp." "MYCSP"             
## [25] "OANN"               "ODIF"               "OFAV"              
## [28] "OFAV\\"             "PAST"               "PCLI"              
## [31] "PFUR"               "PPOR"               "PSTR"              
## [34] "SBOU"               "Scolymia Spp"       "SCUB"              
## [37] "Sid SP"             "SID SP"             "SID SP."           
## [40] "SIDSP"              "SINT"               "SRAD"              
## [43] "SSID"
dca <- dca %>%
  mutate(taxon = case_when(
    taxon == "AGA SP" ~ "AGAR",
    taxon == "LCUC" ~ "HCUC",
    taxon %in% c("MYCSP", "Mycetophyllia spp.") ~ "MYCE",
    taxon == "OFAV\\" ~ "OFAV",
    taxon %in% c("MAD SP", "MADSP") ~ "MADR",
    taxon == "Scolymia Spp" ~ "SCOL",
    taxon %in% c("SIDSP", "Sid SP", "SID SP.", "SID SP") ~ "SIDE",
    TRUE ~ taxon
  ))
sort(unique(dca$taxon))
##  [1] "AAGA"      "ACER"      "AFRA"      "AGAR"      "AHUM"      "ALAM"     
##  [7] "CNAT"      "CORAL"     "Cup Coral" "DLAB"      "DSTO"      "EFAS"     
## [13] "HCUC"      "MADR"      "MALI"      "MARE"      "MCAV"      "MDEC"     
## [19] "MLAM"      "MMEA"      "MPHA"      "MYCE"      "OANN"      "ODIF"     
## [25] "OFAV"      "PAST"      "PCLI"      "PFUR"      "PPOR"      "PSTR"     
## [31] "SBOU"      "SCOL"      "SCUB"      "SIDE"      "SINT"      "SRAD"     
## [37] "SSID"
# Filter out unidentified corals
dca <- dca %>%
  filter(!taxon %in% c("CORAL", "Cup Coral"))

# Write long data to file
write_csv(dca, file = "data/processed/dca_2017_long.csv")


# Convert to count data
# Add explicit zeros for any taxon/size class missing at any site
dca_counts <- dca %>%
  mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
  count(site, taxon, class) %>%
  complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
  # # Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
  filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))

write_csv(dca_counts, file = "data/processed/dca_2017_counts.csv")

# Aggregate count data based on taxonomic groups defined above
dca_counts_ag <- dca_counts %>%
  left_join(taxon_lookup, by = "taxon") %>%
  mutate(taxon = coalesce(taxon_group, taxon)) %>%
  select(-taxon_group) %>%
  group_by(across(-n)) %>%
  summarize(n = sum(n), .groups = "drop")



# Further aggregate juvenile counts to family (following DRM methods)
dca_counts_ag <- dca_counts_ag %>%
  left_join(taxon_lookup_juv, by = "taxon") %>%
  mutate(
    taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
  ) %>%
  select(-taxon_group) %>%
  group_by(across(-n)) %>%
  summarize(n = sum(n), .groups = "drop")


write_csv(dca_counts_ag, file = "data/processed/dca_2017_counts_ag.csv")

2.3 2021 Tetra Tech Recon and ESA Surveys

2021 TT site metadata

# site metadata
tt21_sitemd0 <- read_csv("data/2021_tt_recon_esa/midpoints_latlon.csv") %>%
  janitor::clean_names()

tt21_sitemd <- tt21_sitemd0 %>%
  mutate(name = str_remove(name, "A$")) %>%
  select(site = name, longitude = lon, latitude = lat)

2021 TT coral data

# coral data - recon belt transects
tt21recon0 <- readxl::read_xlsx("data/2021_tt_recon_esa/Recon 30x1m Coral Belt Transect.xlsx") %>%
  janitor::clean_names()

tt21recon <- tt21recon0 %>%
  select(site, taxon = id_abbrev, coral_length_cm, coral_width_cm) %>%
  mutate(taxon = toupper(taxon), site = factor(site)) %>%
  mutate(across(c(coral_length_cm, coral_width_cm), as.numeric)) %>%
  mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
  select(site, taxon, max_width_cm)

# ESA survey data
tt21esa0 <- readxl::read_xlsx("data/2021_tt_recon_esa/ESA Coral Belt Transect.xlsx") %>%
  janitor::clean_names()

sort(unique(tt21esa0$esa_id))
## [1] "0.0"                  "Acropora cervicornis" "ML QC Not ESA"       
## [4] "Orbicella faveolata"  "Orbicella franksi"    "Outside Belt"
tt21esa <- tt21esa0 %>%
  mutate(site = factor(site),
         taxon = case_when(
           esa_id == "Orbicella franksi" ~ "OFRA",
           esa_id == "Orbicella faveolata" ~ "OFAV",
           esa_id == "Acropora cervicornis" ~ "ACER")) %>%
  filter(!is.na(taxon)) %>%
  mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
  select(site, taxon, max_width_cm)

# Combine Recon and ESA survey data
tt21 <- bind_rows(tt21recon, tt21esa)


# Check taxa names
sort(unique(tt21recon$taxon))
##  [1] "0"                       "AAGA"                   
##  [3] "AFRAG"                   "ALAM"                   
##  [5] "CNAT"                    "DLAB"                   
##  [7] "DSTO"                    "EFAS"                   
##  [9] "FFRAG"                   "JUVENILE-UNIDENTIFIABLE"
## [11] "MALC"                    "MANG"                   
## [13] "MCAV"                    "MDEC"                   
## [15] "MMEA"                    "MPHA"                   
## [17] "MYALI"                   "MYLAM"                  
## [19] "ODIF/OROB"               "PAST"                   
## [21] "PDCLIV"                  "PDSTR"                  
## [23] "PHYLLANGIA AMERICANA"    "PPOR"                   
## [25] "SBOU"                    "SCOLYMIA CUBENSIS"      
## [27] "SCOLYMIA LACERA"         "SINT"                   
## [29] "SRAD"                    "SSID"                   
## [31] "XESTO"
# Filter out unidentified OR NON-CORAL taxa
tt21 <- tt21 %>%
  filter(!taxon %in% c("0", "JUVENILE-UNIDENTIFIABLE", "XESTO", "MALC"))

# Adjust/corrects species IDs
tt21 <- tt21 %>%
  mutate(taxon = case_when(
    taxon == "AFRAG" ~ "AFRA",
    taxon == "FFRAG" ~ "FFRA",
    taxon == "MYALI" ~ "MALI",
    taxon == "MYFER" ~ "MFER",
    taxon == "MYLAM" ~ "MLAM",
    taxon == "ODIF/OROB" ~ "OCUL",
    taxon == "PDCLIV" ~ "PCLI",
    taxon == "PDSTR" ~ "PSTR",
    taxon == "PHYLLANGIA AMERICANA" ~ "PAME",
    taxon == "SCOLYMIA CUBENSIS" ~ "SCUB",
    taxon == "SCOLYMIA LACERA" ~ "SLAC",
    TRUE ~ taxon
  ))
sort(unique(tt21$taxon))
##  [1] "AAGA" "ACER" "AFRA" "ALAM" "CNAT" "DLAB" "DSTO" "EFAS" "FFRA" "MALI"
## [11] "MANG" "MCAV" "MDEC" "MLAM" "MMEA" "MPHA" "OCUL" "OFAV" "OFRA" "PAME"
## [21] "PAST" "PCLI" "PPOR" "PSTR" "SBOU" "SCUB" "SINT" "SLAC" "SRAD" "SSID"
# Write long data to file
write_csv(tt21, file = "data/processed/tt_2021_long.csv")





# Count
# Add explicit zeros for any taxon/size class missing at any site
tt21_counts <- tt21 %>%
  mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
  count(site, taxon, class) %>%
  complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
  # Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
  filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))

write_csv(tt21_counts, file = "data/processed/tt_2021_counts.csv")




# Aggregate count data based on taxonomic groups defined above
tt21_counts_ag <- tt21_counts %>%
  left_join(taxon_lookup, by = "taxon") %>%
  mutate(taxon = coalesce(taxon_group, taxon)) %>%
  select(-taxon_group) %>%
  group_by(across(-n)) %>%
  summarize(n = sum(n), .groups = "drop")

# Further aggregate juvenile counts to family (following DRM methods)
tt21_counts_ag <- tt21_counts_ag %>%
  left_join(taxon_lookup_juv, by = "taxon") %>%
  mutate(
    taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
  ) %>%
  select(-taxon_group) %>%
  group_by(across(-n)) %>%
  summarize(n = sum(n), .groups = "drop")

write_csv(tt21_counts_ag, file = "data/processed/tt21_counts_ag.csv")

2.4 2023 Tetra Tech Impact Survey

TT site metadata

# Site metadata
tt_sitemd <- readxl::read_xlsx("data/2023_tt_impact/Impact site tracking.xlsx", skip = 1) %>%
  janitor::clean_names()

tt_sitemd <- tt_sitemd %>%
  mutate(site = transect_name,
         latitude = as.numeric(actual_start_y),
         longitude = as.numeric(actual_start_x)) %>%
  select(site, latitude, longitude) 

# Many sites missing coords in sheet.... whats up with that
tt_sitemd <- drop_na(tt_sitemd, longitude)

TT coral data

tt0 <- readxl::read_xlsx("data/2023_tt_impact/Impact Raw Data 05 31 2024.xlsx") %>%
  janitor::clean_names() 

tt <- tt0 %>%
  select(site = transect_name, depth_ft_start,
         taxon = id_abbrev, coral_length_cm, coral_width_cm) %>%
  filter(taxon != "Xesto") %>%
  mutate(taxon = toupper(taxon)) %>%
  mutate(across(c(coral_length_cm, coral_width_cm), as.numeric)) %>%
  mutate(site = factor(site))

tt <- tt %>%
  mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
  select(site, taxon, max_width_cm)

# Check taxa names
sort(unique(tt$taxon))
##  [1] "?"         "AAGA"      "ACER"      "AFRA"      "AFRAG"     "ALAM"     
##  [7] "ASP"       "ASP."      "CNAT"      "DLAB"      "DSTO"      "EFAS"     
## [13] "FFRA"      "HCUC"      "ID-ABBREV" "MCAV"      "MCAV?"     "MDEC"     
## [19] "MHEARD"    "MMEA"      "MSEN"      "MSP."      "MUSSID"    "MYALI"    
## [25] "MYFER"     "MYLAM"     "NONE"      "OFAV"      "OFR"       "OFRA"     
## [31] "OROB"      "PAME"      "PAST"      "PCLI"      "PCLI?"     "PDIV"     
## [37] "PPOR"      "PSP"       "PSP."      "PSTR"      "SBOU"      "SCUB"     
## [43] "SHYA"      "SINT"      "SLAC"      "SRAD"      "SSID"      "SSP."     
## [49] "STOK"
# Filter out unidentified taxa
tt <- tt %>%
  filter(!taxon %in% c("?", "ID-ABBREV", "NONE", "MHEARD"))

# Adjust/corrects species IDs
tt <- tt %>%
  mutate(taxon = case_when(
    taxon == "AFRAG" ~ "AFRA",
    taxon %in% c("ASP", "ASP.") ~ "AGAR",
    taxon == "MCAV?" ~ "MCAV",
    taxon == "MSP." ~ "MADR",
    taxon == "MUSSID" ~ "MUSS",
    taxon == "MYALI" ~ "MALI",
    taxon == "MYFER" ~ "MFER",
    taxon == "MYLAM" ~ "MLAM",
    taxon == "OFR" ~ "OFRA",
    taxon == "PCLI?" ~ "PCLI",
    taxon %in% c("PSP", "PSP.") ~ "PORI",
    taxon == "SSP." ~ "SIDE",
    taxon == "STOK" ~ "DSTO",
    TRUE ~ taxon
  ))
sort(unique(tt$taxon))
##  [1] "AAGA" "ACER" "AFRA" "AGAR" "ALAM" "CNAT" "DLAB" "DSTO" "EFAS" "FFRA"
## [11] "HCUC" "MADR" "MALI" "MCAV" "MDEC" "MFER" "MLAM" "MMEA" "MSEN" "MUSS"
## [21] "OFAV" "OFRA" "OROB" "PAME" "PAST" "PCLI" "PDIV" "PORI" "PPOR" "PSTR"
## [31] "SBOU" "SCUB" "SHYA" "SIDE" "SINT" "SLAC" "SRAD" "SSID"
# Write long data to file
write_csv(tt, file = "data/processed/tt_2024_long.csv")





# Count
# Add explicit zeros for any taxon/size class missing at any site
tt_counts <- tt %>%
  mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
  count(site, taxon, class) %>%
  complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
  # Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
  filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))

write_csv(tt_counts, file = "data/processed/tt_2024_counts.csv")




# Aggregate count data based on taxonomic groups defined above
tt_counts_ag <- tt_counts %>%
  left_join(taxon_lookup, by = "taxon") %>%
  mutate(taxon = coalesce(taxon_group, taxon)) %>%
  select(-taxon_group) %>%
  group_by(across(-n)) %>%
  summarize(n = sum(n), .groups = "drop")

# Further aggregate juvenile counts to family (following DRM methods)
tt_counts_ag <- tt_counts_ag %>%
  left_join(taxon_lookup_juv, by = "taxon") %>%
  mutate(
    taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
  ) %>%
  select(-taxon_group) %>%
  group_by(across(-n)) %>%
  summarize(n = sum(n), .groups = "drop")

write_csv(tt_counts_ag, file = "data/processed/tt_counts_ag.csv")

2.5 2024 Shedd Survey

Shedd site metadata

# Site metadata
drm_sitemd <- readxl::read_xlsx("data/2024_shedd_drm/site_metadata.xlsx") %>%
  janitor::clean_names() %>%
  mutate(site = as.character(drm_site_id)) %>%
  select(site, latitude = lat, longitude = lon) %>%
  mutate(depth = NA)

Shedd coral data

# Adult coral data from main DRM surveys
#adults0 <- read_csv("data/2024_shedd_drm/DRM_broward_corals.csv")
#adults0 %>% filter(team == "Shedd Aquarium")

# Most sites were included in main DRM database for 2024 -- Import these
adultst1t2 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect1and2_Shedd.xlsx") %>%
  janitor::clean_names() %>%
  filter(subregion == "Broward-Miami", team == "Shedd Aquarium") %>%
  select(site, transect_num, species, width, height)
adultst3t4 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect3and4_Shedd.xlsx") %>%
  janitor::clean_names() %>%
  filter(subregion == "Broward-Miami", team == "Shedd Aquarium") %>%
  select(site, transect_num, species, width, height)

# 9 of our PEV sites were removed from DRM database to avoid oversaturating the ares -- Import these separately
removedt1t2 <- readxl::read_xlsx(
  "data/2024_shedd_drm/2024_DRM_Broward_RemovedSites_T1-T4_Shedd.xlsx", sheet = "Removed Sites T1-T2") %>%
  janitor::clean_names() %>%
  select(site, transect_num, species, width, height)
removedt3t4 <- readxl::read_xlsx(
  "data/2024_shedd_drm/2024_DRM_Broward_RemovedSites_T1-T4_Shedd.xlsx", sheet = "Removed Sites T3-T4") %>%
  janitor::clean_names() %>%
  select(site, transect_num, species, width, height)

# Combine all adult coral data for Shedd DRM surveys at PEV
adults0 <- bind_rows(adultst1t2, adultst3t4, removedt1t2, removedt3t4)
# Convert adult data to long format
adults_long <- adults0 %>%
  mutate(max_width_cm = pmax(width, height, na.rm = TRUE)) %>%
  mutate(max_width_cm = as.character(max_width_cm)) %>%
  mutate(site = str_remove(site, "^AA")) %>%
  select(site, transect_num, taxon = species, max_width_cm) %>%
  drop_na(taxon)     # DROPS when taxon is blank, this is when no corals >4cm were observed

# Import juvenile counts from main DRM dataset
juv <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_JuvenileCoralData_Shedd.xlsx") %>%
  janitor::clean_names() %>%
  filter(subregion == "Broward-Miami", team == "Shedd Aquarium")

# Import juvenile counts from sites that were removed from main DRM dataset
removed_juv <- readxl::read_xlsx("data/2024_shedd_drm/Shedd_removed_sites_Juveniles_2024.xlsx") %>%
  janitor::clean_names() %>%
  # Missing values in count data should be zero counts (unique to this datasheet from FWC)
  mutate(across(ends_with("_ct"), ~replace_na(., 0)))

# Combine juvenile data
juv0 <- bind_rows(juv, removed_juv) %>%
  mutate(site = str_remove(site, "^AA")) %>%
  select(site, transect_num, ends_with("ct")) %>%
  rename(MCAV = montastraea_ct, MUSS = mussinae_ct, FAVI = faviinae_ct, MEAN = meandrinidae_ct)

# Convert juvenile data to long format
juv_long <- juv0 %>%
  pivot_longer(c(MUSS, FAVI, MEAN, MCAV), names_to = "taxon", values_to = "n") %>%
  mutate(max_width_cm = "<4") %>%
  uncount(n)



# Other juvenile taxa counts from Transects 1 and 2 (DRM 'bonus data')
t1t2bonus <- read_csv("data/2024_shedd_drm/T1_T2_bonus_data.csv") %>%
  janitor::clean_names() %>%
  mutate(site = replace_na(site, "NA")) %>%    # Because one site is called "NA"
  mutate(transect_num = parse_number(transect))
t1t2juv <- t1t2bonus %>%
  select(site, transect_num, starts_with("small")) %>%
  rename_with(~ toupper(gsub("^small_", "", .x)), starts_with("small_"))
t1t2juv_long <- t1t2juv %>%
  pivot_longer(3:10, names_to = "taxon", values_to = "n") %>%
  mutate(max_width_cm = "<4") %>%
  uncount(n)
# Replace site names in t1t2 bonus data with the correct DRM site ID
penipsites <- readxl::read_xlsx("data/2024_shedd_drm/site_metadata.xlsx") %>%
  janitor::clean_names()
t1t2juv_long_updated <- t1t2juv_long %>%
  left_join(penipsites %>% select(site, drm_site_id), by = "site") %>%
  mutate(site = as.character(drm_site_id)) %>%
  select(-drm_site_id)


# Combine all data
drm_long <- bind_rows(adults_long, juv_long, t1t2juv_long_updated) %>%
  mutate(team = "Shedd Aquarium")

# Check species names
sort(unique(drm_long$taxon))
##  [1] "AAGA" "ACER" "AFRA" "CNAT" "DLAB" "DSTO" "EFAS" "FAVI" "FFRA" "MALI"
## [11] "MAUR" "MCAV" "MDEC" "MEAN" "MMEA" "MUSS" "OANN" "OFAV" "OFRA" "PAST"
## [21] "PCLI" "PFUR" "PPOR" "PSTR" "SBOU" "SCUB" "SINT" "SRAD" "SSID"
write_csv(drm_long, file = "data/processed/drm_2024_long.csv")


# COUNT based on rules
# ✅ Updated Rules Summary for counting from DRM/Shedd data:
# Juvenile taxa (searched for in <4cm size class only):
#    "MEAN", "MUSS", "FAVI"
#    → these should only ever appear in <4cm, never >4cm, and should not be zero-filled for adults.
# Other juvenile-capable taxa:
#    "MCAV", "SSID", "SRAD", "PAST", "PPOR", "SINT", "SBOU", "AAGA", "MAUR"
#    → these can be counted in both >4cm and <4cm, but only in <4cm if juveniles were searched on that transect and team.
# Transect-based search rules still apply:
# Transects 1 & 2: all adult taxa always searched. Juvenile search depends on team:
#    "Shedd Aquarium" → all juvenile taxa above searched
#    others → only MEAN, MUSS, FAVI, MCAV
# Transects 3 & 4:
#    only subset of adult taxa searched (adult_taxa_t3t4)
#    only juveniles: MEAN, MUSS, FAVI, MCAV

# Step 1: Define size classes
drm_classed <- drm_long %>%
  mutate(class = case_when(as.numeric(max_width_cm) >= 4 ~ ">4cm",
                           max_width_cm == "<4" ~ "<4cm"))

# Step 2: Define species sets
all_taxa <- unique(drm_classed$taxon)
adult_taxa_t3t4 <- c("CNAT", "DSTO", "DLAB", "MMEA", "MANG", "MALI", 
                     "MFER", "MLAM", "PCLI", "PSTR")
juv_only_taxa <- c("MEAN", "MUSS", "FAVI")
juv_both_taxa <- c("MCAV", "SSID", "SRAD", "PAST", "PPOR", "SINT", "SBOU", "AAGA", "MAUR")
all_juv_taxa <- c(juv_only_taxa, juv_both_taxa)

# Step 3: Build search grid per site × transect × team
search_grid <- drm_classed %>%
  distinct(site, transect_num, team) %>%    # if multiple teams in data, remove value for team
  mutate(
    searched_taxa_class = pmap(list(transect_num, team), function(transect, team) {
      # Helper: define juv taxa allowed for this transect/team
      juv_taxa <- if (transect %in% c(1, 2)) {
        if (team == "Shedd Aquarium") {      # Shedd searched for other juv taxa on T1 and T2, other DRM survey teams did not
          all_juv_taxa
        } else {
          c(juv_only_taxa, "MCAV")
        }
      } else {
        c(juv_only_taxa, "MCAV")
      }
      
      # Adults always searched in 1 & 2, subset in 3 & 4
      adult_taxa <- if (transect %in% c(1, 2)) {
        setdiff(all_taxa, juv_only_taxa)  # exclude juv-only taxa
      } else {
        adult_taxa_t3t4
      }

      # Build grid
      bind_rows(
        expand_grid(taxon = adult_taxa, class = ">4cm"),
        expand_grid(taxon = juv_taxa, class = "<4cm")
      )
    })
  ) %>%
  unnest(searched_taxa_class)

# Step 4: Count observations
counts <- drm_classed %>%
  group_by(site, transect_num, team, taxon, class) %>%
  summarize(n = n(), .groups = "drop")

# Step 5: Join with grid and fill in zeros where appropriate
final_counts <- search_grid %>%
  left_join(counts, by = c("site", "transect_num", "team", "taxon", "class")) %>%
  mutate(n = replace_na(n, 0))

write_csv(final_counts, file = "data/processed/drm_2024_counts.csv")




# AGGREGATE COUNT DATA
# Aggregate taxa
final_counts_ag <- final_counts %>%
  left_join(taxon_lookup, by = "taxon") %>%
  mutate(taxon = coalesce(taxon_group, taxon)) %>%
  select(-taxon_group) %>%
  group_by(across(-n)) %>%
  summarize(n = sum(n), .groups = "drop")

write_csv(final_counts_ag, file = "data/processed/drm_2024_counts_ag.csv")

2.6 Habitat classification polygons

# --- STEP 1: Load KML Polygons ---
polygons <- st_read("data/Habitat classifications.kml")  # update path as needed
## Reading layer `Habitat classifications' from data source 
##   `/Users/rosscunning/Projects/PENIP/data/Habitat classifications.kml' 
##   using driver `KML'
## Simple feature collection with 190 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XYZ
## Bounding box:  xmin: -80.11618 ymin: 25.97477 xmax: -80.06143 ymax: 26.25943
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84
# --- STEP 2: Extract Attributes from HTML Description ---
extract_attrs <- function(desc) {
  if (is.na(desc) || desc == "") {
    return(tibble(Habitat = NA, Type = NA, Modifier = NA, Region = NA, Type2 = NA))
  }
  html <- read_html(desc)
  rows <- xml_find_all(html, "//table//table//tr")
  keys <- rows %>% xml_find_all(".//td[1]") %>% xml_text(trim = TRUE)
  vals <- rows %>% xml_find_all(".//td[2]") %>% xml_text(trim = TRUE)
  n <- min(length(keys), length(vals))
  named_vals <- set_names(vals[1:n], keys[1:n])
  tibble(
    Habitat  = named_vals[["Habitat"]],
    Type     = named_vals[["Type"]],
    Modifier = named_vals[["Modifier"]],
    Region   = named_vals[["Region"]],
    Type2    = named_vals[["Type2"]]
  )
}

# Apply function and combine with spatial geometries
attrs <- map_dfr(polygons$Description, extract_attrs)
polygons_clean <- bind_cols(polygons %>% select(-Description), attrs)

# --- STEP 3: Prepare Site Coordinate Data ---
# Get all site coordinates, and assign north and south
allsitemd <- bind_rows(.id = "dataset",
  drm = drm_sitemd,
  dca = dca_sitemd,
  tt23 = tt_sitemd,
  tt21 = tt21_sitemd
) %>%
  mutate(dir = if_else(latitude > 26.093570, "N", "S"))
points <- st_as_sf(allsitemd, coords = c("longitude", "latitude"), crs = 4326)

# --- STEP 4: Validate Geometry and Match CRS ---
polygons_clean <- polygons_clean %>%
  st_zm(drop = TRUE, what = "ZM") %>%
  st_make_valid() %>%
  st_transform(st_crs(points))

sf_use_s2(FALSE)  # prevent s2 geometry issues

# --- STEP 5: Spatial Join ---
joined <- st_join(points, polygons_clean, join = st_within)

joined_df <- joined %>%
  mutate(longitude = st_coordinates(.)[,1],
         latitude = st_coordinates(.)[,2]) %>%
  st_drop_geometry()

allsitemd <- joined_df

# Visualize habitat classifications
(polyplot <- polygons_clean %>% 
  #filter(Type != "Sand") %>%
  ggplot() +
  geom_sf(aes(fill = Type), color = "black", size = 0.2, alpha = 0.6) +
  scale_fill_brewer(palette = "Set3", na.value = "gray80") +
  theme_minimal() +
  labs(title = "Habitat Polygons Colored by Type", fill = "Type") +
  theme(legend.position = "right") +
  xlim(-80.11, -80.079) +
  ylim(26.0675, 26.11))

# 'Sand' overlaps some of the other polygons instead of just surrounding them...
# If a point is classified as Sand AND something else, remove Sand...
multiclass <- allsitemd %>%
  group_by(site) %>%
  filter(n() > 1)

allsitemd_clean <- allsitemd %>%
  group_by(site) %>%
  filter(!(Type == "Sand" & n() > 1)) %>%
  ungroup()

3 Filter data

3.1 By habitat type

# Select sites in Nearshore Ridge Complex, Inner Reef, and Middle Reef, Outer Reef and Aggregated Patch Reef
selected <- allsitemd_clean %>%
  filter(Type %in% c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef", 
                     "Artificial", "Outer Reef", "Aggregated Patch Reef"))

# Plot selected sites
polyplot + 
  geom_point(data = selected, 
             aes(x = longitude, y = latitude, shape = dataset), 
             inherit.aes = FALSE, alpha = 0.6) +
  scale_shape_manual(values = c(2, 3, 4, 5))

4 Map datasets

(dca_plot <- polyplot + 
  geom_point(
    data = filter(selected, dataset == "dca"),
    aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
  labs(title = "2017 — DCA Survey"))

(tt21_plot <- polyplot + 
  geom_point(
    data = filter(selected, dataset == "tt21"),
    aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
  labs(title = "2021 — Tetra Tech Survey"))

(tt23_plot <- polyplot + 
  geom_point(
    data = filter(selected, dataset == "tt23"),
    aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
  labs(title = "2023 — Tetra Tech Survey"))

(drm_plot <- polyplot + 
  geom_point(
    data = filter(selected, dataset == "drm"),
    aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
  labs(title = "2024 — Shedd Survey"))

# Combine all aggregated count data and filter for selected sites
df <- bind_rows(.id = "dataset",
  drm = final_counts_ag,
  dca = dca_counts_ag,
  tt23 = tt_counts_ag,
  tt21 = tt21_counts_ag
) %>%
  filter(site %in% selected$site) %>%
  select(dataset, site, transect_num, taxon, class, n) %>%
  droplevels() %>%
  left_join(allsitemd_clean)

# Add transect area information
df <- df %>%
  mutate(transect_num = if_else(is.na(transect_num), 1, transect_num)) %>%
  mutate(transect_area_m2 = case_when(
    dataset == "drm" ~ 10,    # DRM belt transects were 10m
    dataset == "tt23" ~ 20,     # TT23 belt transects were 20m
    dataset == "tt21" ~ 30,     # TT21 belt transects were 30m
    dataset == "dca" ~ 30     # DCA belt transects were 30m
  ))

Can ‘Artificial’ be aggregated with ‘Nearshore Ridge Complex’?

‘Artificial’ is only present in DCA and minorly in TT – but absent from Shedd.

It is in close spatial proximity to Nearshore Ridge Complex – can these be combined?

Test for differences in coral density in DCA survey

# Subset DCA data
dcadf <- dca_counts_ag %>%
  left_join(allsitemd_clean) %>%
  filter(Type %in% c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef", "Artificial"))

# Fit a Negative Binomial GLM
mod_nb <- MASS::glm.nb(n ~ Type, data = dcadf)

# Generate new data only for existing taxon-size class combinations
newdata_1 <- dcadf %>%
  distinct(Type)

# Get predicted values & standard errors (log scale)
preds_nb <- predict(mod_nb, newdata_1, type = "link", se.fit = TRUE)

# Compute both total coral density & taxon-size class-specific densities in one step
results_nb <- newdata_1 %>%
  mutate(
    fit = exp(preds_nb$fit),                    # Convert fitted values to response scale
    fit_se = exp(preds_nb$fit) * preds_nb$se.fit,   # Convert SE using the Delta Method
    fit_var = (fit * preds_nb$se.fit)^2,         # Variance propagation
    fit_lower = exp(preds_nb$fit - 1.96 * preds_nb$se.fit),  # Lower CI
    fit_upper = exp(preds_nb$fit + 1.96 * preds_nb$se.fit)   # Upper CI
  )

# Compute total coral density + confidence intervals
total_ci_nb <- results_nb %>%
  group_by(Type) %>%
  summarize(
    total_density = sum(fit),
    total_se = sqrt(sum(fit_var)),
    lower_95CI = exp(log(total_density) - 1.96 * (total_se / total_density)),
    upper_95CI = exp(log(total_density) + 1.96 * (total_se / total_density))
  )

knitr::kable(total_ci_nb)
Type total_density total_se lower_95CI upper_95CI
Artificial 1.7580645 0.2269451 1.3650635 2.264210
Inner Reef 1.3680352 0.1696949 1.0727781 1.744555
Middle Reef 0.8573201 0.0998316 0.6823733 1.077120
Nearshore Ridge Complex 1.9869432 0.1764332 1.6695542 2.364669

Highly overlapping confidence intervals for Artifical and Nearshore Ridge Complex, so no difference in coral density.

Test for differences in community composition in DCA survey

library(vegan)

# 1. Create a wide community matrix: rows = site x Type, columns = taxa
comm_matrix <- dcadf %>%
  group_by(site, Type, taxon) %>%
  summarize(total_n = sum(n), .groups = "drop") %>%
  pivot_wider(names_from = taxon, values_from = total_n, values_fill = 0)

# 2. Extract community matrix and metadata
comm_data <- comm_matrix %>% select(-site, -Type)
site_info <- comm_matrix %>% select(site, Type)

# 3. Run NMDS (k = 2 dimensions is standard)
set.seed(123)  # for reproducibility
nmds <- metaMDS(comm_data, distance = "bray", k = 2, trymax = 100)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2267843 
## Run 1 stress 0.2276397 
## Run 2 stress 0.235479 
## Run 3 stress 0.2268499 
## ... Procrustes: rmse 0.006015082  max resid 0.04502679 
## Run 4 stress 0.2325294 
## Run 5 stress 0.2437078 
## Run 6 stress 0.2276398 
## Run 7 stress 0.2356379 
## Run 8 stress 0.2315451 
## Run 9 stress 0.2297249 
## Run 10 stress 0.2314156 
## Run 11 stress 0.2374012 
## Run 12 stress 0.235405 
## Run 13 stress 0.2327499 
## Run 14 stress 0.2314157 
## Run 15 stress 0.2267804 
## ... New best solution
## ... Procrustes: rmse 0.001160623  max resid 0.007358882 
## ... Similar to previous best
## Run 16 stress 0.2268231 
## ... Procrustes: rmse 0.002309085  max resid 0.018594 
## Run 17 stress 0.22764 
## Run 18 stress 0.2268497 
## ... Procrustes: rmse 0.006092397  max resid 0.04511697 
## Run 19 stress 0.2352389 
## Run 20 stress 0.2334834 
## *** Best solution repeated 1 times
# 4. Prepare data for plotting
scores_df <- scores(nmds)$sites %>%
  bind_cols(site_info)

# 5. Plot NMDS with ggplot2
ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = Type)) +
  geom_point(size = 3, alpha = 0.8) +
  theme_minimal() +
  labs(title = "NMDS of Coral Community Structure by Habitat Type",
       color = "Habitat Type")

High degree of similarity between Artificial and Nearshore Ridge Complex coral communities.

2.2.3. Combine ‘Artificial’ with ‘Nearshore Ridge Complex’

# Based on these results, combine "Artificial" with "Nearshore Ridge Complex"
df[df$Type == "Artificial", "Type"] <- "Nearshore Ridge Complex"

4.1 By coral taxa

Remove lowest abundance coral taxa

These will be problematic for statistical models

# Drop taxa with very few observations
sppcounts <- df %>%
  group_by(taxon) %>%
  summarize(total = sum(n), .groups = "drop") %>%
  arrange(total) 
sppcounts
## # A tibble: 22 × 2
##    taxon total
##    <chr> <int>
##  1 MANG      0
##  2 FFRA      1
##  3 HCUC      1
##  4 PAME      1
##  5 SCOL      2
##  6 OCUL      3
##  7 ORBI     26
##  8 MYCE     30
##  9 EFAS     35
## 10 MUSS     37
## # ℹ 12 more rows
dff <- df %>%
  filter(taxon %in% filter(sppcounts, total >= 5)$taxon) %>%
  mutate(Type = factor(Type, levels = type_levels))

Taxa with less than 5 total observations were filtered out, which included: HCUC, MANG, PAME, FFRA, OCUL, SCOL

5 Analyze coral density

5.1 Fit Bayesian negative binomial model

Predictors: dataset, habitat Type, direction from channel, taxon

# Get total counts for each taxon at each site
dfftaxon <- dff %>% 
  group_by(dataset, Type, dir, site, transect_num, transect_area_m2, taxon) %>%
  summarize(n = sum(n)) %>%
  ungroup()

# Remove any taxa not observed in a given dataset / habitat Type ?????
dfftaxon_trimmed <- dfftaxon %>%
  group_by(dataset, Type, dir, taxon) %>%
  filter(any(n > 0)) %>%
  ungroup()

# SUPER MODEL

# mod_nb <- brm(
#   bf(n ~ dataset * Type * dir * taxon + offset(log(transect_area_m2))),
#   family = negbinomial(),
#   data = dfftaxon_trimmed,
#   prior = c(prior(normal(0, 2), class = "b"),          # Weak prior on coefficients
#             prior(normal(0, 5), class = "Intercept"),  # Weak prior on intercept
#             prior(exponential(1), class = "shape")),     # Reasonable prior for NB dispersion
#   chains = 4,  
#   cores = 4,  
#   threads = threading(5),  
#   iter = 1000, warmup = 500,  
#   thin = 2,  
#   control = list(adapt_delta = 0.9, max_treedepth = 12),  
#   backend = "cmdstanr"
# )
# saveRDS(mod_nb, file = "data/processed/mod_nb.rds")
mod_nb <- readRDS("data/processed/mod_nb.rds")

# 1. Create newdata grid (1 m² for standardization)
newdata <- dfftaxon_trimmed %>%
  distinct(dataset, Type, dir, taxon) %>%
  mutate(transect_area_m2 = 1)

# 2. Get fitted values manually by computing summary statistics across all draws
posterior_draws <- add_epred_draws(mod_nb, newdata = newdata) %>%
  mutate(Type = factor(Type, levels = type_levels),
         dataset = factor(dataset, levels = dataset_levels))

5.2 Totals by survey, habitat, direction

# Compute and plot total coral abundance by dataset, habitat Type, and direction from channel
total_abund <- posterior_draws %>%
  group_by(.draw, dataset, Type, dir) %>%
  summarize(total_epred = sum(.epred), .groups = "drop") %>%
  group_by(dataset, Type, dir) %>%
  summarize(
    fit_mean = mean(total_epred),
    fit_sd = sd(total_epred),
    fit_lower = quantile(total_epred, 0.025),
    fit_upper = quantile(total_epred, 0.975),
    .groups = "drop"
  )

ggplot(total_abund, aes(x = dataset, y = fit_mean, color = dir)) +
  facet_grid(~ Type, labeller = as_labeller(type_labels)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = fit_lower, ymax = fit_upper), width = 0,
                position = position_dodge(width = 0.5)) +
  scale_x_discrete(labels = dataset_labels) +
  theme(axis.text.x = element_text(angle = 90))

5.2.1 North-south differences?

# Any N-S differences in total coral abundance?
total_NS_diff <- posterior_draws %>%
  ungroup() %>%
  group_by(.draw, dataset, Type, dir) %>%
  summarize(total_epred = sum(.epred), .groups = "drop") %>%
  pivot_wider(names_from = dir, values_from = total_epred) %>%
  filter(!is.na(N), !is.na(S)) %>%  # make sure both are present
  mutate(diff = N - S)  # or S - N depending on desired contrast

total_NS_summ <- total_NS_diff %>%
  group_by(dataset, Type) %>%
  summarize(
    mean_diff = mean(diff),
    lower_95 = quantile(diff, 0.025),
    upper_95 = quantile(diff, 0.975),
    p_gt_0 = mean(diff > 0),
    p_lt_0 = mean(diff < 0),
    sig = ifelse(lower_95 > 0 | upper_95 < 0, "yes", "no"),
    .groups = "drop"
  )

total_NS_summ
## # A tibble: 16 × 8
##    dataset Type                  mean_diff lower_95 upper_95 p_gt_0 p_lt_0 sig  
##    <fct>   <fct>                     <dbl>    <dbl>    <dbl>  <dbl>  <dbl> <chr>
##  1 dca     Nearshore Ridge Comp…    -0.401   -0.925   0.106   0.062  0.938 no   
##  2 dca     Inner Reef               -0.506   -1.23    0.0934  0.044  0.956 no   
##  3 dca     Middle Reef              -0.328   -0.684  -0.0188  0.017  0.983 yes  
##  4 dca     Outer Reef               -0.491   -0.823  -0.174   0.001  0.999 yes  
##  5 dca     Aggregated Patch Reef     0.109   -0.440   0.721   0.642  0.358 no   
##  6 tt21    Nearshore Ridge Comp…    -1.96    -2.91   -1.33    0      1     yes  
##  7 tt21    Middle Reef              -0.859   -2.37    0.558   0.089  0.911 no   
##  8 tt21    Outer Reef               -0.417   -2.19    1.43    0.305  0.695 no   
##  9 tt21    Aggregated Patch Reef    -0.373   -1.69    1.03    0.226  0.774 no   
## 10 tt23    Nearshore Ridge Comp…     0.711    0.282   1.20    0.998  0.002 yes  
## 11 tt23    Inner Reef                0.130   -0.973   1.39    0.572  0.428 no   
## 12 tt23    Middle Reef              -0.390   -1.00    0.136   0.09   0.91  no   
## 13 tt23    Outer Reef               -0.878   -1.70   -0.115   0.014  0.986 yes  
## 14 drm     Nearshore Ridge Comp…     1.45     0.486   2.63    0.999  0.001 yes  
## 15 drm     Inner Reef               -0.108   -0.950   0.831   0.386  0.614 no   
## 16 drm     Middle Reef               1.03    -0.228   2.32    0.953  0.047 no

5.2.2 Survey differences?

# 1. Sum predicted values across taxa (total coral density per draw × dataset × Type × dir)
draws_total <- posterior_draws %>%
  mutate(dataset = as.character(dataset)) %>%
  group_by(.draw, dataset, Type, dir) %>%
  summarize(total_epred = sum(.epred), .groups = "drop")

# 2. Self-join to compare dataset pairs within each draw × Type × dir
pairwise_total <- draws_total %>%
  rename(dataset1 = dataset, epred1 = total_epred) %>%
  inner_join(
    draws_total %>% rename(dataset2 = dataset, epred2 = total_epred),
    by = c(".draw", "Type", "dir")
  ) %>%
  filter(dataset1 < dataset2) %>%
  mutate(diff = epred1 - epred2)

# 3. Summarize posterior contrasts
summary_total_contrasts <- pairwise_total %>%
  group_by(Type, dir, dataset1, dataset2) %>%
  summarize(
    mean_diff = mean(diff),
    lower = quantile(diff, 0.025),
    upper = quantile(diff, 0.975),
    prob_diff = mean(diff > 0),
    .groups = "drop"
  )

# 4. Optional: filter strong differences
summary_total_contrasts %>%
  filter(prob_diff < 0.01 | prob_diff > 0.99) %>%
  arrange(Type, dir)
## # A tibble: 18 × 8
##    Type               dir   dataset1 dataset2 mean_diff  lower   upper prob_diff
##    <fct>              <chr> <chr>    <chr>        <dbl>  <dbl>   <dbl>     <dbl>
##  1 Nearshore Ridge C… N     dca      drm         -2.10  -3.26  -1.18       0    
##  2 Nearshore Ridge C… N     dca      tt21         1.70   1.36   2.09       1    
##  3 Nearshore Ridge C… N     drm      tt21         3.80   2.96   4.89       1    
##  4 Nearshore Ridge C… N     drm      tt23         1.94   1.01   3.10       1    
##  5 Nearshore Ridge C… N     tt21     tt23        -1.85  -2.27  -1.52       0    
##  6 Nearshore Ridge C… S     dca      tt23         0.959  0.493  1.45       1    
##  7 Nearshore Ridge C… S     drm      tt23         1.20   0.744  1.74       1    
##  8 Nearshore Ridge C… S     tt21     tt23         0.814  0.152  1.79       0.991
##  9 Inner Reef         N     dca      drm         -1.09  -1.95  -0.394      0    
## 10 Inner Reef         N     dca      tt23        -0.926 -2.06  -0.0922     0.007
## 11 Middle Reef        N     dca      drm         -1.84  -2.95  -1.06       0    
## 12 Middle Reef        N     dca      tt23        -0.621 -1.05  -0.273      0    
## 13 Middle Reef        N     drm      tt23         1.21   0.364  2.34       0.999
## 14 Middle Reef        S     dca      tt23        -0.683 -1.28  -0.178      0.004
## 15 Outer Reef         N     dca      tt21        -1.68  -3.08  -0.770      0    
## 16 Outer Reef         N     dca      tt23        -0.784 -1.28  -0.340      0.001
## 17 Outer Reef         S     dca      tt21        -1.61  -3.26  -0.491      0    
## 18 Outer Reef         S     dca      tt23        -1.17  -1.95  -0.542      0

5.3 Taxon totals by survey, habitat, direction from channel

fitted_taxon <- posterior_draws %>%
  group_by(dataset, Type, dir, taxon) %>%
  summarize(fit_mean = mean(.epred),  # Posterior mean (expected value)
            fit_sd = sd(.epred),  # Posterior standard deviation
            fit_lower = quantile(.epred, 0.025),        # 2.5% quantile (lower CI)
            fit_upper = quantile(.epred, 0.975))    # 97.5% quantile (upper CI)

# Plot
ggplot(fitted_taxon, aes(y = fit_mean, x = Type, color = dir, shape = dataset,
                          group = interaction(dataset, dir))) +
  facet_wrap(taxon ~ ., scales = "free_x") +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_line(position = position_dodge(width = 0.5), alpha = 0.5) +
  geom_errorbar(aes(ymin = fit_lower, ymax = fit_upper), width = 0,
                position = position_dodge(width = 0.5), lwd = 0.25) +
  scale_y_log10() +
  scale_x_discrete(labels = type_labels) +
  labs(x = "Habitat Type", y = "Density (per m2)")

5.3.1 North-south differences?

# Any N-S differences in taxon abundance within habitat Types/datasets?
#### Pivot wide so you can calculate N vs S difference per draw
taxon_NS_diff <- posterior_draws %>%
  ungroup() %>%
  select(.draw, dataset, Type, dir, taxon, .epred) %>%
  pivot_wider(names_from = dir, values_from = .epred) %>%
  filter(!is.na(N), !is.na(S)) %>%  # ensure both directions exist for the draw
  mutate(diff = S - N)  # or N - S depending on interpretation

taxon_NS_summ <- taxon_NS_diff %>%
  group_by(dataset, Type, taxon) %>%
  summarize(
    mean_diff = mean(diff),
    lower_95 = quantile(diff, 0.025),
    upper_95 = quantile(diff, 0.975),
    p_gt_0 = mean(diff > 0),
    p_lt_0 = mean(diff < 0),
    sig = ifelse(lower_95 > 0 | upper_95 < 0, "yes", "no"),
    .groups = "drop"
  )

taxon_NS_summ %>%
  filter(sig == "yes") %>%
  mutate(greater = if_else(mean_diff > 0, "N", "S")) %>%
  group_by(dataset, Type, greater) %>%
  summarize(taxa = paste(taxon, collapse = ",")) %>%
  arrange(Type, greater)
## # A tibble: 10 × 4
## # Groups:   dataset, Type [8]
##    dataset Type                    greater taxa               
##    <fct>   <fct>                   <chr>   <chr>              
##  1 dca     Nearshore Ridge Complex N       DSTO,MCAV,MEAN,SINT
##  2 drm     Nearshore Ridge Complex N       MCAV               
##  3 dca     Nearshore Ridge Complex S       ACER,FAVI,SOLE     
##  4 tt23    Nearshore Ridge Complex S       FAVI,PORI,SIDE,SOLE
##  5 drm     Nearshore Ridge Complex S       SIDE               
##  6 tt23    Inner Reef              S       PORI               
##  7 dca     Middle Reef             N       MADR,PORI          
##  8 tt23    Middle Reef             N       AGAR,MYCE,SINT     
##  9 dca     Outer Reef              N       DSTO,SIDE          
## 10 tt23    Outer Reef              N       MADR,SINT

5.3.2 Survey differences?

# Ensure dataset is character so we can do < comparison
draws_clean <- posterior_draws %>%
  mutate(dataset = as.character(dataset))

# One self-join to get all unique dataset pairs
pairwise_contrasts <- draws_clean %>%
  rename(dataset1 = dataset, epred1 = .epred) %>%
  inner_join(
    draws_clean %>% rename(dataset2 = dataset, epred2 = .epred),
    by = c(".draw", "taxon", "dir", "Type")
  ) %>%
  filter(dataset1 < dataset2) %>%  # Avoid duplicates and self-pairs
  mutate(diff = epred1 - epred2)

# Step: summarize the posterior differences
summary_contrasts <- pairwise_contrasts %>%
  group_by(taxon, dir, Type, dataset1, dataset2) %>%
  summarize(mean_diff = mean(diff),
            lower = quantile(diff, 0.025),
            upper = quantile(diff, 0.975),
            prob_diff = mean(diff > 0),
            .groups = "drop")

out <- summary_contrasts %>% 
  filter(prob_diff < 0.01) %>%
  arrange(taxon, Type, dir)
out
## # A tibble: 33 × 9
##    taxon dir   Type       dataset1 dataset2 mean_diff   lower    upper prob_diff
##    <chr> <chr> <fct>      <chr>    <chr>        <dbl>   <dbl>    <dbl>     <dbl>
##  1 ACER  S     Nearshore… dca      drm        -0.0250 -0.0515 -0.00722     0.003
##  2 AGAR  S     Middle Re… dca      tt23       -0.0443 -0.0950 -0.0111      0.004
##  3 AGAR  N     Outer Reef dca      tt21       -0.0734 -0.164  -0.0178      0.002
##  4 DSTO  N     Nearshore… dca      tt23       -0.0276 -0.0507 -0.0106      0.001
##  5 FAVI  S     Nearshore… dca      drm        -0.0318 -0.0602 -0.00928     0.002
##  6 MADR  N     Outer Reef dca      tt21       -0.0930 -0.230  -0.00984     0.005
##  7 MCAV  N     Nearshore… dca      tt23       -0.125  -0.192  -0.0725      0    
##  8 MCAV  N     Nearshore… drm      tt23       -0.133  -0.201  -0.0762      0    
##  9 MCAV  N     Inner Reef dca      tt23       -0.390  -0.862  -0.118       0    
## 10 MCAV  N     Middle Re… dca      drm        -0.177  -0.349  -0.0333      0.007
## # ℹ 23 more rows

6 Overall taxon differences among surveys

# Proportion of transect area per dataset × Type × dir
survey_weights <- dfftaxon %>%
  group_by(dataset, Type, dir) %>%
  summarize(area = sum(transect_area_m2), .groups = "drop") %>%
  group_by(dataset) %>%
  mutate(weight = area / sum(area)) %>%
  select(dataset, Type, dir, weight)


posterior_weighted <- posterior_draws %>%
  mutate(dataset = as.character(dataset)) %>%
  left_join(survey_weights, by = c("dataset", "Type", "dir")) %>%
  mutate(weighted_epred = .epred * weight)


posterior_totals <- posterior_weighted %>%
  group_by(.draw, dataset, taxon) %>%
  summarize(total_epred = sum(weighted_epred), .groups = "drop")


pairwise_overall <- posterior_totals %>%
  rename(dataset1 = dataset, epred1 = total_epred) %>%
  inner_join(
    posterior_totals %>% rename(dataset2 = dataset, epred2 = total_epred),
    by = c(".draw", "taxon")
  ) %>%
  filter(dataset1 < dataset2) %>%
  mutate(diff = epred1 - epred2)

summary_overall <- pairwise_overall %>%
  group_by(taxon, dataset1, dataset2) %>%
  summarize(mean_diff = mean(diff),
            lower = quantile(diff, 0.025),
            upper = quantile(diff, 0.975),
            prob_diff = mean(diff > 0),
            .groups = "drop")


summary_overall %>%
  filter(upper < 0 | lower > 0) %>%
  print(n = nrow(.))
## # A tibble: 44 × 7
##    taxon dataset1 dataset2 mean_diff      lower     upper prob_diff
##    <chr> <chr>    <chr>        <dbl>      <dbl>     <dbl>     <dbl>
##  1 ACER  dca      drm       -0.0137  -0.0241    -0.00529      0    
##  2 ACER  dca      tt21       0.00646  0.00349    0.00952      1    
##  3 ACER  drm      tt21       0.0201   0.0117     0.0309       1    
##  4 AGAR  dca      tt21      -0.0132  -0.0323    -0.000357     0.022
##  5 AGAR  drm      tt21      -0.0174  -0.0365    -0.00223      0.014
##  6 DSTO  dca      drm       -0.0121  -0.0230    -0.00333      0.002
##  7 DSTO  dca      tt23      -0.00699 -0.0149    -0.000124     0.023
##  8 EFAS  dca      tt23      -0.00410 -0.00849   -0.000950     0.006
##  9 FAVI  dca      drm       -0.0223  -0.0372    -0.0106       0    
## 10 FAVI  dca      tt23       0.00835  0.00211    0.0141       0.993
## 11 FAVI  drm      tt21       0.0231   0.00725    0.0403       0.993
## 12 FAVI  drm      tt23       0.0307   0.0188     0.0449       1    
## 13 MADR  dca      drm        0.0267   0.0164     0.0378       1    
## 14 MADR  drm      tt21      -0.0481  -0.0858    -0.0239       0    
## 15 MADR  drm      tt23      -0.0176  -0.0275    -0.00691      0.004
## 16 MADR  tt21     tt23       0.0305   0.00666    0.0684       0.997
## 17 MCAV  dca      tt23      -0.0488  -0.0980    -0.00167      0.023
## 18 MCAV  drm      tt23      -0.0697  -0.121     -0.0173       0.005
## 19 MEAN  dca      drm        0.0215   0.0142     0.0282       1    
## 20 MEAN  dca      tt21       0.0183   0.00805    0.0268       0.997
## 21 MEAN  dca      tt23       0.0162   0.00873    0.0234       1    
## 22 MUSS  dca      drm       -0.00442 -0.00980   -0.000253     0.013
## 23 MUSS  dca      tt21      -0.00600 -0.0153    -0.000424     0.019
## 24 MUSS  dca      tt23       0.00210  0.0000473  0.00408      0.975
## 25 MUSS  drm      tt23       0.00653  0.00263    0.0118       1    
## 26 MUSS  tt21     tt23       0.00810  0.00265    0.0176       0.999
## 27 MYCE  dca      tt23      -0.00309 -0.00683   -0.000279     0.017
## 28 ORBI  dca      tt21      -0.00825 -0.0174    -0.00225      0    
## 29 ORBI  dca      tt23       0.00145  0.0000286  0.00297      0.976
## 30 ORBI  drm      tt23       0.00390  0.000635   0.00942      0.993
## 31 ORBI  tt21     tt23       0.00970  0.00388    0.0191       1    
## 32 PORI  dca      drm       -0.127   -0.207     -0.0520       0    
## 33 PORI  drm      tt23       0.115    0.0250     0.210        0.989
## 34 SIDE  dca      drm       -0.970   -1.29      -0.689        0    
## 35 SIDE  dca      tt23      -0.184   -0.381     -0.00106      0.023
## 36 SIDE  drm      tt21       0.713    0.294      1.11         0.999
## 37 SIDE  drm      tt23       0.786    0.474      1.12         1    
## 38 SINT  dca      drm        0.0842   0.0313     0.134        0.997
## 39 SINT  drm      tt21      -0.162   -0.288     -0.0662       0    
## 40 SINT  drm      tt23      -0.131   -0.193     -0.0707       0    
## 41 SOLE  dca      drm       -0.0172  -0.0327    -0.00491      0.002
## 42 SOLE  dca      tt23      -0.0147  -0.0245    -0.00511      0.002
## 43 SOLE  drm      tt21       0.0180   0.000864   0.0355       0.977
## 44 SOLE  tt21     tt23      -0.0155  -0.0280    -0.000749     0.02

7 Total corals in impact zones

7.1 4.1. Calculate area of each habitat type in each impact zone

impact_zones <- st_read("data/Impact_zones.kml") %>%
  st_zm(drop = TRUE, what = "ZM") %>%
  st_make_valid() %>%
  st_transform(st_crs(polygons_clean))
## Reading layer `Impact_zones' from data source 
##   `/Users/rosscunning/Projects/PENIP/data/Impact_zones.kml' using driver `KML'
## Simple feature collection with 9 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY, XYZ
## Bounding box:  xmin: -80.10538 ymin: 26.08289 xmax: -80.08298 ymax: 26.1069
## z_range:       zmin: -21.40724 zmax: 6.930363e-14
## Geodetic CRS:  WGS 84
impact_zones <- impact_zones %>%
  rename(ImpactZone = Name)  # or whatever column contains zone names

library(ggplot2)

impact_zones_plot <- impact_zones %>%
  mutate(linetype_group = "Impact Zone")

ggplot() +
  # Habitat polygons
  geom_sf(data = polygons_clean, aes(fill = Type), color = "black", size = 0.2, alpha = 0.6) +
  
  # Impact zones with dummy linetype for legend
  geom_sf(data = impact_zones_plot, aes(linetype = linetype_group), 
          fill = NA, color = "black", linewidth = 0.6, show.legend = TRUE) +
  
  # Color scales
  scale_fill_brewer(palette = "Set3", na.value = "gray80") +
  scale_linetype_manual(name = "", values = c("Impact Zone" = "dashed")) +
  
  # Theme and layout
  theme_minimal() +
  labs(title = "Habitat Polygons within Impact Zones", fill = "Type") +
  theme(legend.position = "right") +
  xlim(-80.11, -80.079) +
  ylim(26.08, 26.11)

# Spatial intersection of habitat polygons with impact zones
habitat_in_zones <- st_intersection(polygons_clean, impact_zones)


# Use a projected CRS for accurate area (e.g., UTM Zone 17N for South Florida)
habitat_in_zones_proj <- habitat_in_zones %>%
  st_transform(32617) %>%
  mutate(area_m2 = st_area(geometry))


# Adjust column names depending on your Impact Zones KML
area_summary <- habitat_in_zones_proj %>%
  st_drop_geometry() %>%
  group_by(Type, ImpactZone) %>%
  summarize(total_area_m2 = sum(as.numeric(area_m2)), .groups = "drop")


area_summary[area_summary$Type == "Artificial", "Type"] <- "Nearshore Ridge Complex"

area_summary <- area_summary %>%
  group_by(Type, ImpactZone) %>%
  summarize(total_area_m2 = sum(total_area_m2))

7.2 4.2. Calculate total corals in each impact zone

area_summary <- area_summary %>%
  mutate(ImpactZone = factor(ImpactZone, 
    levels = c("Channel", "Side Slopes", "Scenario 2, > 10 cm", "Scenario 2, 5.1-10 cm",
               "Scenario 4, 1.1-5 cm", "Scenario 4, 0.51-1 cm", "Scenario 4, 0.1-0.5 cm")))

totals <- left_join(total_abund, area_summary, by = "Type") %>%
  mutate(tot_estimate = fit_mean * total_area_m2) %>%
  mutate(Type = factor(Type, levels = type_levels)) %>%
  select(dataset, Type, ImpactZone, tot_estimate)

# Define ordered impact zone levels
impact_levels <- c("Channel", "Side Slopes", "Scenario 2, > 10 cm", "Scenario 2, 5.1-10 cm",
                   "Scenario 4, 1.1-5 cm", "Scenario 4, 0.51-1 cm", "Scenario 4, 0.1-0.5 cm")

# Ensure correct factor order
totals <- totals %>%
  mutate(ImpactZone = factor(ImpactZone, levels = impact_levels))

# Cumulative summaries across increasing levels
cumulative_zones <- seq_along(impact_levels) %>%
  map_dfr(function(i) {
    zone_subset <- impact_levels[1:i]

    totals %>%
      drop_na(dataset) %>%
      filter(ImpactZone %in% zone_subset) %>%
      group_by(dataset, Type) %>%
      summarize(tot = sum(tot_estimate), .groups = "drop") %>%
      pivot_wider(names_from = dataset, values_from = tot) %>%
      mutate(zone_group = paste0("z", i))
  })



cumulative_zones %>%
  filter(zone_group == "z7") %>%
  pivot_longer(2:5, names_to = "dataset") %>%
  mutate(dataset = factor(dataset, levels = dataset_levels)) %>%
  ggplot(aes(x = Type, y = value, fill = dataset)) +
  geom_col(position = position_dodge()) +
  scale_fill_discrete(labels = dataset_labels) +
  labs(x = "", y = "Total corals", title = "Total corals in all impact zones, by habitat")

cumulative_zones %>%
  filter(zone_group == "z7") %>%
  pivot_longer(2:5, names_to = "dataset") %>%
  group_by(dataset) %>%
  summarize(value = sum(value, na.rm = T)) %>%
  mutate(dataset = factor(dataset, levels = dataset_levels)) %>%
  ggplot(aes(x = 1, y = value, fill = dataset)) +
  geom_col(position = position_dodge()) +
  scale_fill_discrete(labels = dataset_labels) +
  labs(x = "", y = "Total corals", title = "Total corals in all impact zones - all habitats")